library(tidyverse)
## ─ Attaching packages ─────────────── tidyverse 1.3.1 ─
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.4     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ─ Conflicts ───────────────── tidyverse_conflicts() ─
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## The following object is masked from 'package:purrr':
## 
##     transpose

#STEP 1:

Read Data

data_2004 <- data.table::fread("~/Desktop/USC/PM566-Labs/Assignment_01/2004.csv")
data_2019 <- data.table::fread("~/Desktop/USC/PM566-Labs/Assignment_01/2019.csv")

Check dimension, headers, footers, variable names and types

Dimension

dim(data_2004)
## [1] 19233    20
dim(data_2019)
## [1] 53086    20

Headers

head(data_2004)
##          Date Source  Site ID POC Daily Mean PM2.5 Concentration    UNITS
## 1: 01/01/2004    AQS 60010007   1                           11.0 ug/m3 LC
## 2: 01/02/2004    AQS 60010007   1                           12.2 ug/m3 LC
## 3: 01/03/2004    AQS 60010007   1                           16.5 ug/m3 LC
## 4: 01/04/2004    AQS 60010007   1                           18.1 ug/m3 LC
## 5: 01/05/2004    AQS 60010007   1                           11.5 ug/m3 LC
## 6: 01/06/2004    AQS 60010007   1                           32.5 ug/m3 LC
##    DAILY_AQI_VALUE Site Name DAILY_OBS_COUNT PERCENT_COMPLETE
## 1:              46 Livermore               1              100
## 2:              51 Livermore               1              100
## 3:              60 Livermore               1              100
## 4:              64 Livermore               1              100
## 5:              48 Livermore               1              100
## 6:              94 Livermore               1              100
##    AQS_PARAMETER_CODE                     AQS_PARAMETER_DESC CBSA_CODE
## 1:              88502 Acceptable PM2.5 AQI & Speciation Mass     41860
## 2:              88502 Acceptable PM2.5 AQI & Speciation Mass     41860
## 3:              88502 Acceptable PM2.5 AQI & Speciation Mass     41860
## 4:              88101               PM2.5 - Local Conditions     41860
## 5:              88502 Acceptable PM2.5 AQI & Speciation Mass     41860
## 6:              88502 Acceptable PM2.5 AQI & Speciation Mass     41860
##                            CBSA_NAME STATE_CODE      STATE COUNTY_CODE  COUNTY
## 1: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
## 2: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
## 3: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
## 4: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
## 5: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
## 6: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
##    SITE_LATITUDE SITE_LONGITUDE
## 1:      37.68753      -121.7842
## 2:      37.68753      -121.7842
## 3:      37.68753      -121.7842
## 4:      37.68753      -121.7842
## 5:      37.68753      -121.7842
## 6:      37.68753      -121.7842
head(data_2019)
##          Date Source  Site ID POC Daily Mean PM2.5 Concentration    UNITS
## 1: 01/01/2019    AQS 60010007   3                            5.7 ug/m3 LC
## 2: 01/02/2019    AQS 60010007   3                           11.9 ug/m3 LC
## 3: 01/03/2019    AQS 60010007   3                           20.1 ug/m3 LC
## 4: 01/04/2019    AQS 60010007   3                           28.8 ug/m3 LC
## 5: 01/05/2019    AQS 60010007   3                           11.2 ug/m3 LC
## 6: 01/06/2019    AQS 60010007   3                            2.7 ug/m3 LC
##    DAILY_AQI_VALUE Site Name DAILY_OBS_COUNT PERCENT_COMPLETE
## 1:              24 Livermore               1              100
## 2:              50 Livermore               1              100
## 3:              68 Livermore               1              100
## 4:              86 Livermore               1              100
## 5:              47 Livermore               1              100
## 6:              11 Livermore               1              100
##    AQS_PARAMETER_CODE       AQS_PARAMETER_DESC CBSA_CODE
## 1:              88101 PM2.5 - Local Conditions     41860
## 2:              88101 PM2.5 - Local Conditions     41860
## 3:              88101 PM2.5 - Local Conditions     41860
## 4:              88101 PM2.5 - Local Conditions     41860
## 5:              88101 PM2.5 - Local Conditions     41860
## 6:              88101 PM2.5 - Local Conditions     41860
##                            CBSA_NAME STATE_CODE      STATE COUNTY_CODE  COUNTY
## 1: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
## 2: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
## 3: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
## 4: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
## 5: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
## 6: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
##    SITE_LATITUDE SITE_LONGITUDE
## 1:      37.68753      -121.7842
## 2:      37.68753      -121.7842
## 3:      37.68753      -121.7842
## 4:      37.68753      -121.7842
## 5:      37.68753      -121.7842
## 6:      37.68753      -121.7842

Footers

tail(data_2004)
##          Date Source  Site ID POC Daily Mean PM2.5 Concentration    UNITS
## 1: 12/14/2004    AQS 61131003   1                             11 ug/m3 LC
## 2: 12/17/2004    AQS 61131003   1                             16 ug/m3 LC
## 3: 12/20/2004    AQS 61131003   1                             17 ug/m3 LC
## 4: 12/23/2004    AQS 61131003   1                              9 ug/m3 LC
## 5: 12/26/2004    AQS 61131003   1                             24 ug/m3 LC
## 6: 12/29/2004    AQS 61131003   1                              9 ug/m3 LC
##    DAILY_AQI_VALUE            Site Name DAILY_OBS_COUNT PERCENT_COMPLETE
## 1:              46 Woodland-Gibson Road               1              100
## 2:              59 Woodland-Gibson Road               1              100
## 3:              61 Woodland-Gibson Road               1              100
## 4:              38 Woodland-Gibson Road               1              100
## 5:              76 Woodland-Gibson Road               1              100
## 6:              38 Woodland-Gibson Road               1              100
##    AQS_PARAMETER_CODE       AQS_PARAMETER_DESC CBSA_CODE
## 1:              88101 PM2.5 - Local Conditions     40900
## 2:              88101 PM2.5 - Local Conditions     40900
## 3:              88101 PM2.5 - Local Conditions     40900
## 4:              88101 PM2.5 - Local Conditions     40900
## 5:              88101 PM2.5 - Local Conditions     40900
## 6:              88101 PM2.5 - Local Conditions     40900
##                                  CBSA_NAME STATE_CODE      STATE COUNTY_CODE
## 1: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
## 2: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
## 3: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
## 4: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
## 5: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
## 6: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
##    COUNTY SITE_LATITUDE SITE_LONGITUDE
## 1:   Yolo      38.66121      -121.7327
## 2:   Yolo      38.66121      -121.7327
## 3:   Yolo      38.66121      -121.7327
## 4:   Yolo      38.66121      -121.7327
## 5:   Yolo      38.66121      -121.7327
## 6:   Yolo      38.66121      -121.7327
tail(data_2019)
##          Date Source  Site ID POC Daily Mean PM2.5 Concentration    UNITS
## 1: 11/11/2019    AQS 61131003   1                           13.5 ug/m3 LC
## 2: 11/17/2019    AQS 61131003   1                           18.1 ug/m3 LC
## 3: 11/29/2019    AQS 61131003   1                           12.5 ug/m3 LC
## 4: 12/17/2019    AQS 61131003   1                           23.8 ug/m3 LC
## 5: 12/23/2019    AQS 61131003   1                            1.0 ug/m3 LC
## 6: 12/29/2019    AQS 61131003   1                            9.1 ug/m3 LC
##    DAILY_AQI_VALUE            Site Name DAILY_OBS_COUNT PERCENT_COMPLETE
## 1:              54 Woodland-Gibson Road               1              100
## 2:              64 Woodland-Gibson Road               1              100
## 3:              52 Woodland-Gibson Road               1              100
## 4:              76 Woodland-Gibson Road               1              100
## 5:               4 Woodland-Gibson Road               1              100
## 6:              38 Woodland-Gibson Road               1              100
##    AQS_PARAMETER_CODE       AQS_PARAMETER_DESC CBSA_CODE
## 1:              88101 PM2.5 - Local Conditions     40900
## 2:              88101 PM2.5 - Local Conditions     40900
## 3:              88101 PM2.5 - Local Conditions     40900
## 4:              88101 PM2.5 - Local Conditions     40900
## 5:              88101 PM2.5 - Local Conditions     40900
## 6:              88101 PM2.5 - Local Conditions     40900
##                                  CBSA_NAME STATE_CODE      STATE COUNTY_CODE
## 1: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
## 2: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
## 3: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
## 4: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
## 5: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
## 6: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
##    COUNTY SITE_LATITUDE SITE_LONGITUDE
## 1:   Yolo      38.66121      -121.7327
## 2:   Yolo      38.66121      -121.7327
## 3:   Yolo      38.66121      -121.7327
## 4:   Yolo      38.66121      -121.7327
## 5:   Yolo      38.66121      -121.7327
## 6:   Yolo      38.66121      -121.7327

Check Variables for 2004 Data

str(data_2004)
## Classes 'data.table' and 'data.frame':   19233 obs. of  20 variables:
##  $ Date                          : chr  "01/01/2004" "01/02/2004" "01/03/2004" "01/04/2004" ...
##  $ Source                        : chr  "AQS" "AQS" "AQS" "AQS" ...
##  $ Site ID                       : int  60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 ...
##  $ POC                           : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Daily Mean PM2.5 Concentration: num  11 12.2 16.5 18.1 11.5 32.5 14 29.9 21 15.7 ...
##  $ UNITS                         : chr  "ug/m3 LC" "ug/m3 LC" "ug/m3 LC" "ug/m3 LC" ...
##  $ DAILY_AQI_VALUE               : int  46 51 60 64 48 94 55 88 70 59 ...
##  $ Site Name                     : chr  "Livermore" "Livermore" "Livermore" "Livermore" ...
##  $ DAILY_OBS_COUNT               : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ PERCENT_COMPLETE              : num  100 100 100 100 100 100 100 100 100 100 ...
##  $ AQS_PARAMETER_CODE            : int  88502 88502 88502 88101 88502 88502 88101 88502 88502 88101 ...
##  $ AQS_PARAMETER_DESC            : chr  "Acceptable PM2.5 AQI & Speciation Mass" "Acceptable PM2.5 AQI & Speciation Mass" "Acceptable PM2.5 AQI & Speciation Mass" "PM2.5 - Local Conditions" ...
##  $ CBSA_CODE                     : int  41860 41860 41860 41860 41860 41860 41860 41860 41860 41860 ...
##  $ CBSA_NAME                     : chr  "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" ...
##  $ STATE_CODE                    : int  6 6 6 6 6 6 6 6 6 6 ...
##  $ STATE                         : chr  "California" "California" "California" "California" ...
##  $ COUNTY_CODE                   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ COUNTY                        : chr  "Alameda" "Alameda" "Alameda" "Alameda" ...
##  $ SITE_LATITUDE                 : num  37.7 37.7 37.7 37.7 37.7 ...
##  $ SITE_LONGITUDE                : num  -122 -122 -122 -122 -122 ...
##  - attr(*, ".internal.selfref")=<externalptr>
summary(data_2004$`Daily Mean PM2.5 Concentration`)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -0.10    6.00   10.10   13.13   16.30  251.00
table(data_2004$`Daily Mean PM2.5 Concentration`)
## 
##  -0.1     0   0.1   0.2   0.3   0.4   0.5   0.6   0.7   0.8   0.9     1   1.1 
##     1    11    14    20    27    32    35    41    43    40    31    94    39 
##   1.2   1.3   1.4   1.5   1.6   1.7   1.8   1.9     2   2.1   2.2   2.3   2.4 
##    40    37    31    30    43    35    37    35   112    59    44    40    46 
##   2.5   2.6   2.7   2.8   2.9     3   3.1   3.2   3.3   3.4   3.5   3.6   3.7 
##    53    58    48    58    64   197    61    63    88    56    73    82    78 
##   3.8   3.9     4   4.1   4.2   4.3   4.4   4.5   4.6   4.7   4.8   4.9     5 
##    70    60   365    68    74    74    79    93    63    75    88    80   425 
##   5.1   5.2   5.3   5.4   5.5   5.6   5.7   5.8   5.9     6   6.1   6.2   6.3 
##    86    94    98    83    97    75    92    86   104   431    85    96    98 
##   6.4   6.5   6.6   6.7   6.8   6.9     7   7.1   7.2   7.3   7.4   7.5   7.6 
##    87   100    68    96    81    89   411    86   101    83    91   101    64 
##   7.7   7.8   7.9     8   8.1   8.2   8.3   8.4   8.5   8.6   8.7   8.8   8.9 
##   123    97    85   399    70   114    87    98   112    85    98    95    90 
##     9   9.1   9.2   9.3   9.4   9.5   9.6   9.7   9.8   9.9    10  10.1  10.2 
##   355    81   105    81    84   100    70   107    91    73   275    98   101 
##  10.3  10.4  10.5  10.6  10.7  10.8  10.9    11  11.1  11.2  11.3  11.4  11.5 
##    86    71   109    77    95    86    93   268    70    90    96    72    83 
##  11.6  11.7  11.8  11.9    12  12.1  12.2  12.3  12.4  12.5  12.6  12.7  12.8 
##    75    75    71    60   228    74    76    71    79    75    83    66    72 
##  12.9    13  13.1  13.2  13.3  13.4  13.5  13.6  13.7  13.8  13.9    14  14.1 
##    55   170    67    74    62    56    68    64    58    59    49   136    63 
##  14.2  14.3  14.4  14.5  14.6  14.7  14.8  14.9    15  15.1  15.2  15.3  15.4 
##    64    55    52    45    52    70    50    57   136    50    59    57    52 
##  15.5  15.6  15.7  15.8  15.9    16  16.1  16.2  16.3  16.4  16.5  16.6  16.7 
##    53    46    48    44    53   127    37    51    48    48    50    41    38 
##  16.8  16.9    17  17.1  17.2  17.3  17.4  17.5  17.6  17.7  17.8  17.9    18 
##    47    32    96    37    37    32    46    42    35    37    40    25    87 
##  18.1  18.2  18.3  18.4  18.5  18.6  18.7  18.8  18.9    19  19.1  19.2  19.3 
##    36    38    24    18    33    39    32    23    29    69    30    17    30 
##  19.4  19.5  19.6  19.7  19.8  19.9    20  20.1  20.2  20.3  20.4  20.5  20.6 
##    19    40    37    21    18    28    85    27    33    29    20    25    25 
##  20.7  20.8  20.9    21  21.1  21.2  21.3  21.4  21.5  21.6  21.7  21.8  21.9 
##    24    21    19    65    22    16    17    15    23    19    19    15    11 
##    22  22.1  22.2  22.3  22.4  22.5  22.6  22.7  22.8  22.9    23  23.1  23.2 
##    39    27    16    17    14    22    18    20    18    15    55    22    13 
##  23.3  23.4  23.5  23.6  23.7  23.8  23.9    24  24.1  24.2  24.3  24.4  24.5 
##    22    20    16    17    16    20    12    47    19    19    14     6    17 
##  24.6  24.7  24.8  24.9    25  25.1  25.2  25.3  25.4  25.5  25.6  25.7  25.8 
##    13    14    12    14    53    18    14    14    11    16    13    18     7 
##  25.9    26  26.1  26.2  26.3  26.4  26.5  26.6  26.7  26.8  26.9    27  27.1 
##    12    50    16     8    14    13    14    17    10     9    12    43    14 
##  27.2  27.3  27.4  27.5  27.6  27.7  27.8  27.9    28  28.1  28.2  28.3  28.4 
##    22     8     3    13     7     6    11    10    38    23    20    14    10 
##  28.5  28.6  28.7  28.8  28.9    29  29.1  29.2  29.3  29.4  29.5  29.6  29.7 
##     9    10     8     7    17    30    12    10    12     6    15    21    13 
##  29.8  29.9    30  30.1  30.2  30.3  30.4  30.5  30.6  30.7  30.8  30.9    31 
##    11    10    39    14    15    11     9    10    11    13    10     5    29 
##  31.1  31.2  31.3  31.4  31.5  31.6  31.7  31.8  31.9    32  32.1  32.2  32.3 
##     6    16     4    10     7     1     8    16     8    27     7    12    12 
##  32.4  32.5  32.6  32.7  32.8  32.9    33  33.1  33.2  33.3  33.4  33.5  33.6 
##     9    10     9     7    11    10    31     4     4    10    12    12     6 
##  33.7  33.8  33.9    34  34.1  34.2  34.3  34.4  34.5  34.6  34.7  34.8  34.9 
##    11     7     4    35     3     7     4    11     7     7     4     5     9 
##    35  35.1  35.2  35.3  35.4  35.5  35.6  35.7  35.8  35.9    36  36.1  36.2 
##    23     7    15     5     5     8     6     5     3     2    24     7     9 
##  36.3  36.4  36.5  36.6  36.7  36.8  36.9    37  37.1  37.2  37.3  37.4  37.5 
##     3     5     8     6     8     7     5    19     5     5     8    10     4 
##  37.6  37.7  37.8  37.9    38  38.1  38.2  38.3  38.4  38.5  38.6  38.7  38.8 
##     5     4     7     3    24     3     8     4     7     7     3     6     5 
##  38.9    39  39.1  39.2  39.3  39.4  39.5  39.6  39.7  39.8  39.9    40  40.1 
##     3    17     6     7     3     6     4     6     6     3     4     7     8 
##  40.2  40.3  40.4  40.5  40.6  40.7  40.8  40.9    41  41.1  41.2  41.3  41.4 
##     6     4     8    11     1     3     7     3    12     5     7     5     2 
##  41.5  41.6  41.7  41.8  41.9    42  42.1  42.2  42.3  42.4  42.5  42.6  42.7 
##     9     3     5     3     5    10     5     6     6     5     4     5     5 
##  42.9    43  43.1  43.2  43.3  43.4  43.5  43.6  43.7  43.8  43.9    44  44.1 
##     8    11     5     6     6     7     3     2     5     1     3    12     4 
##  44.2  44.3  44.4  44.5  44.6  44.7  44.8  44.9    45  45.1  45.2  45.3  45.4 
##     3     4     2     2     6     4     3     5     6     1     1     1     4 
##  45.5  45.7  45.8  45.9    46  46.1  46.2  46.3  46.4  46.5  46.6  46.7  46.8 
##     4     7     3     3     3     3     2     1     3     7     2     4     1 
##  46.9    47  47.1  47.2  47.3  47.5  47.6  47.7  47.8  47.9    48  48.1  48.2 
##     5     8     2     5     2     2     3     3     4     2     3     1     2 
##  48.3  48.5  48.6  48.7  48.9    49  49.2  49.3  49.4  49.5  49.6  49.7  49.9 
##     2     3     1     4     2     5     1     3     5     1     2     5     1 
##    50  50.1  50.2  50.4  50.5  50.6  50.8  50.9    51  51.2  51.4  51.5  51.7 
##     4     4     1     2     1     3     1     2     5     4     1     3     2 
##  51.8  51.9    52  52.1  52.2  52.4  52.5  52.7  52.8  52.9    53  53.1  53.2 
##     1     1     4     2     1     2     3     1     1     3     5     1     2 
##  53.3  53.5  53.7  53.8  53.9    54  54.2  54.3  54.4  54.6  54.8  54.9    55 
##     1     1     1     3     1     2     2     2     1     2     2     2     3 
##  55.1  55.2  55.3  55.5  55.6  55.7  55.8    56  56.1  56.2  56.3  56.4  56.8 
##     1     1     1     2     1     1     3     1     1     2     1     3     1 
##    57  57.2  57.3  57.4  57.9  58.1  58.4  58.7  58.9  59.1  59.2  59.3  59.4 
##     4     1     3     1     1     1     2     1     2     1     2     1     1 
##  59.5  59.7  59.9    60  60.1  60.3  60.4  60.5  60.7  60.8  60.9    61  61.2 
##     2     2     2     2     1     1     1     1     1     2     1     3     1 
##  61.5  61.7  61.8  62.5  62.6  62.7  63.1  63.4  63.9    64  64.9    65  65.3 
##     1     1     2     2     1     1     1     1     1     1     1     2     1 
##  65.4  66.1  66.3  66.6  67.1  67.3  67.4  68.2  68.6  68.7  68.9    69  69.3 
##     2     3     2     2     1     1     3     1     1     1     1     2     1 
##    70  70.6    71  71.4  72.4  72.8  73.6  73.7  74.2  74.5    75  75.6  76.8 
##     1     2     1     2     1     2     2     1     1     1     1     1     1 
##  77.1  77.5  79.8  80.9    81  81.4  81.6  81.9  82.3    83  86.1  90.2  90.7 
##     1     1     1     1     1     1     2     1     1     2     1     1     1 
##  90.9  91.7  93.4  93.8  95.7 100.4 102.1 110.4 122.5 148.4 170.4   251 
##     1     1     1     1     1     1     1     1     1     1     1     1

I think that the concentration of PM2.5 should not be negative, so I need to remove all the data that is negative.

data_2004 <- data_2004[`Daily Mean PM2.5 Concentration`>=0]
summary(data_2004$`Daily Mean PM2.5 Concentration`)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    6.00   10.10   13.13   16.30  251.00

Check Variables for 2019 Data

str(data_2019)
## Classes 'data.table' and 'data.frame':   53086 obs. of  20 variables:
##  $ Date                          : chr  "01/01/2019" "01/02/2019" "01/03/2019" "01/04/2019" ...
##  $ Source                        : chr  "AQS" "AQS" "AQS" "AQS" ...
##  $ Site ID                       : int  60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 ...
##  $ POC                           : int  3 3 3 3 3 3 3 3 3 3 ...
##  $ Daily Mean PM2.5 Concentration: num  5.7 11.9 20.1 28.8 11.2 2.7 2.8 7 3.1 7.1 ...
##  $ UNITS                         : chr  "ug/m3 LC" "ug/m3 LC" "ug/m3 LC" "ug/m3 LC" ...
##  $ DAILY_AQI_VALUE               : int  24 50 68 86 47 11 12 29 13 30 ...
##  $ Site Name                     : chr  "Livermore" "Livermore" "Livermore" "Livermore" ...
##  $ DAILY_OBS_COUNT               : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ PERCENT_COMPLETE              : num  100 100 100 100 100 100 100 100 100 100 ...
##  $ AQS_PARAMETER_CODE            : int  88101 88101 88101 88101 88101 88101 88101 88101 88101 88101 ...
##  $ AQS_PARAMETER_DESC            : chr  "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" ...
##  $ CBSA_CODE                     : int  41860 41860 41860 41860 41860 41860 41860 41860 41860 41860 ...
##  $ CBSA_NAME                     : chr  "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" ...
##  $ STATE_CODE                    : int  6 6 6 6 6 6 6 6 6 6 ...
##  $ STATE                         : chr  "California" "California" "California" "California" ...
##  $ COUNTY_CODE                   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ COUNTY                        : chr  "Alameda" "Alameda" "Alameda" "Alameda" ...
##  $ SITE_LATITUDE                 : num  37.7 37.7 37.7 37.7 37.7 ...
##  $ SITE_LONGITUDE                : num  -122 -122 -122 -122 -122 ...
##  - attr(*, ".internal.selfref")=<externalptr>
summary(data_2019$`Daily Mean PM2.5 Concentration`)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -2.200   4.000   6.500   7.734   9.900 120.900
table(data_2019$`Daily Mean PM2.5 Concentration`)
## 
##  -2.2    -2  -1.9  -1.8  -1.7  -1.6  -1.5  -1.4  -1.3  -1.2  -1.1    -1  -0.9 
##     1    12    16    11    12    12     8    11    10    16     9    12    10 
##  -0.8  -0.7  -0.6  -0.5  -0.4  -0.3  -0.2  -0.1     0   0.1   0.2   0.3   0.4 
##    11    15     9    15    13    24    29    26    70    39    69    84    82 
##   0.5   0.6   0.7   0.8   0.9     1   1.1   1.2   1.3   1.4   1.5   1.6   1.7 
##   126   136   160   142   160   359   185   214   204   223   294   238   333 
##   1.8   1.9     2   2.1   2.2   2.3   2.4   2.5   2.6   2.7   2.8   2.9     3 
##   271   289   450   271   411   365   310   482   383   470   397   397   562 
##   3.1   3.2   3.3   3.4   3.5   3.6   3.7   3.8   3.9     4   4.1   4.2   4.3 
##   403   525   459   400   587   458   590   452   471   720   467   603   512 
##   4.4   4.5   4.6   4.7   4.8   4.9     5   5.1   5.2   5.3   5.4   5.5   5.6 
##   490   616   522   616   503   480   723   471   598   509   491   596   486 
##   5.7   5.8   5.9     6   6.1   6.2   6.3   6.4   6.5   6.6   6.7   6.8   6.9 
##   600   495   452   582   445   537   462   451   577   394   552   427   425 
##     7   7.1   7.2   7.3   7.4   7.5   7.6   7.7   7.8   7.9     8   8.1   8.2 
##   542   374   446   411   416   486   400   490   389   367   484   331   407 
##   8.3   8.4   8.5   8.6   8.7   8.8   8.9     9   9.1   9.2   9.3   9.4   9.5 
##   372   306   455   317   390   329   303   427   293   389   306   305   389 
##   9.6   9.7   9.8   9.9    10  10.1  10.2  10.3  10.4  10.5  10.6  10.7  10.8 
##   253   340   296   253   338   229   317   269   253   306   229   254   241 
##  10.9    11  11.1  11.2  11.3  11.4  11.5  11.6  11.7  11.8  11.9    12  12.1 
##   205   252   224   257   186   199   229   196   230   152   140   233   139 
##  12.2  12.3  12.4  12.5  12.6  12.7  12.8  12.9    13  13.1  13.2  13.3  13.4 
##   177   151   175   179   141   168   122   134   169   132   153   138   110 
##  13.5  13.6  13.7  13.8  13.9    14  14.1  14.2  14.3  14.4  14.5  14.6  14.7 
##   157   125   142   102   117   132    84   129    83   112   107    88   106 
##  14.8  14.9    15  15.1  15.2  15.3  15.4  15.5  15.6  15.7  15.8  15.9    16 
##    89    82   122    86   114    65    90    88    72    87    73    67    69 
##  16.1  16.2  16.3  16.4  16.5  16.6  16.7  16.8  16.9    17  17.1  17.2  17.3 
##    72    75    50    57    69    60    60    52    38    74    52    54    36 
##  17.4  17.5  17.6  17.7  17.8  17.9    18  18.1  18.2  18.3  18.4  18.5  18.6 
##    40    54    40    52    41    42    33    42    46    38    30    52    41 
##  18.7  18.8  18.9    19  19.1  19.2  19.3  19.4  19.5  19.6  19.7  19.8  19.9 
##    36    35    35    36    30    34    34    41    43    32    28    26    31 
##    20  20.1  20.2  20.3  20.4  20.5  20.6  20.7  20.8  20.9    21  21.1  21.2 
##    40    29    28    18    26    24    21    38    29    21    41    20    26 
##  21.3  21.4  21.5  21.6  21.7  21.8  21.9    22  22.1  22.2  22.3  22.4  22.5 
##    16    22    21    11    14    17    10    33    21    20    14    16    24 
##  22.6  22.7  22.8  22.9    23  23.1  23.2  23.3  23.4  23.5  23.6  23.7  23.8 
##    19    12    16    11    20    14    18    17    19    20    11     5    13 
##  23.9    24  24.1  24.2  24.3  24.4  24.5  24.6  24.7  24.8  24.9    25  25.1 
##    16    12    12    12    10     9    11    15    18    11    10    11    10 
##  25.2  25.3  25.4  25.5  25.6  25.7  25.8  25.9    26  26.1  26.2  26.3  26.4 
##     6     5    12    15     3    11    10     6    11     8     8    10     9 
##  26.5  26.6  26.7  26.8  26.9    27  27.1  27.2  27.3  27.4  27.5  27.6  27.7 
##    10    10    10     8     5    12     6    11     8    14     4     8     6 
##  27.8  27.9    28  28.1  28.2  28.3  28.4  28.5  28.6  28.7  28.8  28.9    29 
##     9     2    15     9    11     8     2     9     6    10     5     5     6 
##  29.1  29.2  29.3  29.4  29.5  29.6  29.7  29.8  29.9    30  30.1  30.2  30.3 
##     6     6     7     6     8    10    11     9     7    11     2     4     3 
##  30.4  30.5  30.6  30.7  30.8  30.9    31  31.1  31.2  31.3  31.4  31.5  31.6 
##     4    12    12     8     7    10     7     9    10     4     6     8     5 
##  31.7  31.8  31.9    32  32.1  32.2  32.3  32.4  32.5  32.6  32.7  32.8  32.9 
##     7     2     4     3     3     4     4     3     4     3     7     7     5 
##    33  33.1  33.2  33.3  33.4  33.5  33.6  33.7  33.8  33.9    34  34.1  34.2 
##     3     8     8     2     4     6     2     4     5     4     1     3     5 
##  34.3  34.4  34.5  34.6  34.7  34.8  34.9    35  35.1  35.2  35.3  35.4  35.5 
##     1     4     4     4     3     5     2     1     4     1     3     3     4 
##  35.6  35.7  35.8  35.9    36  36.1  36.2  36.3  36.4  36.7  36.8  36.9    37 
##     3     2     2     3     3     2     5     6     7     2     2     3     1 
##  37.1  37.2  37.3  37.4  37.5  37.6  37.7  37.8  37.9    38  38.1  38.3  38.4 
##     7     3     1     1     3     1     1     3     3     1     2     1     2 
##  38.5  38.6  38.7  38.9    39  39.1  39.2  39.3  39.4  39.5  39.6  39.7  39.8 
##     2     3     1     2     5     2     3     1     1     5     2     3     1 
##  39.9    40  40.1  40.2  40.3  40.4  40.5  40.6  40.7  40.9    41  41.1  41.2 
##     2     2     3     2     4     1     1     1     3     5     1     4     2 
##  41.3  41.4  41.5  41.6  41.7  41.8  41.9  42.2  42.3  42.8  43.1  43.3  43.4 
##     2     3     1     2     1     1     1     1     1     1     2     1     3 
##  43.5  43.6    44  44.2  44.3  44.4  44.5  44.7  44.8  45.1  45.3  45.4  45.5 
##     1     1     1     2     2     1     1     1     1     1     1     1     1 
##  45.7  45.8    46  46.3  46.4  46.5  46.7  47.1  47.2  47.4  47.5  47.9    48 
##     1     1     1     1     4     1     3     3     1     1     1     1     1 
##  48.1  48.2  48.8    49  49.3  49.4  49.6  50.1  50.2  50.6  50.7  50.9  51.3 
##     1     1     1     1     1     2     1     1     1     2     2     2     1 
##  52.3  52.4    53  53.1  54.7  55.7    57  57.6  57.7  58.2  58.8  59.1  60.4 
##     1     1     2     2     1     1     1     2     1     1     1     1     1 
##  60.5  62.2  62.6  63.4  66.1  68.4  68.5  70.1  70.3  71.2  73.9  75.1  77.3 
##     1     1     1     1     1     1     1     1     1     1     1     1     1 
##  77.4  81.3  83.7  91.1  97.3  98.9 103.5 104.5 120.9 
##     1     1     1     1     1     1     1     1     1

Same as 2004 data, I remove all the data that is less than 0

data_2019 <- data_2019[`Daily Mean PM2.5 Concentration`>=0]
summary(data_2019$`Daily Mean PM2.5 Concentration`)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    4.00    6.50    7.78   10.00  120.90

STEP 2

Combine two datasets

data_2004[, Year := "2004"]
data_2019[, Year := "2019"]
all_data = rbind(data_2004, data_2019)
all_data = rename(all_data, concentration = `Daily Mean PM2.5 Concentration`)
all_data = rename(all_data, lat = SITE_LATITUDE)
all_data = rename(all_data, lon = SITE_LONGITUDE)

STEP 3

Creating a map

library(leaflet)
pal <- colorFactor(c("Blue","DarkOrange"), domain = all_data$Year)
leaflet(all_data) %>%
  addProviderTiles('CartoDB.Positron') %>% 
  addCircleMarkers(lat=~lat,lng=~lon, opacity=1, fillOpacity=1, radius=1,color = ~pal(all_data$Year))

The distribution of sites is radial centralized around two sites: Los Angeles and San Francisco. And more are located near the coastline than the inner land.

Step 4

Check for missing values

 sum(is.na(all_data$Date))
## [1] 0
 sum(is.na(all_data$concentration))
## [1] 0
 sum(is.na(all_data$`Site Name`))
## [1] 0
 sum(is.na(all_data$COUNTY))
## [1] 0
 sum(is.na(all_data$STATE))
## [1] 0
length(all_data$`Site Name`)
## [1] 72036
p = 331/72036; p
## [1] 0.004594925

In step 1, I have deleted all implausible pm2.5 concentration which is less than 0. And for this section, I find that 0.45% site names are missinng, so the plots based on site in Los Angeles may be less convincing.

Step 5

state

library(ggforce)
ggplot( 
  all_data[!is.na(concentration)], 
  mapping = aes( x = concentration, fill = Year)) +
  geom_histogram() +
  facet_zoom(x = concentration <70)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

From the graph, we can see that generally for California state, the concentration of PM2.5 is dropped from year 2004 to year 2019

county

ggplot( 
  all_data[!is.na(concentration)], 
  mapping = aes( y = concentration, x = COUNTY, color = Year)) +
  geom_point() +
  theme(axis.text.x = element_text(angle=90, hjust=1, vjust=1))

From the graph, we can see that PM2.5 concentration decreases from 2004 to 2019 in most counties in California, except 8 counties such as Butte, Contra Costa and Manposa.

site in Los Angeles

LA_data = filter(all_data, COUNTY == "Los Angeles")
ggplot( 
  LA_data[!is.na(concentration)], 
  mapping = aes( y = concentration, x = `Site Name`, fill = Year)) +
  geom_boxplot() +
  theme(axis.text.x = element_text(angle=90, hjust=1, vjust=1))

Based on sites in Los Angeles, excluding some sites with missing data, we can see that PM2.5 concentration decreases from 2004 to 2019.